knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, fig.align = 'center') knitr::opts_knit$set(root.dir = "/mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex")
library(data.table) library(GenomicFeatures) library(GenomicAlignments) library(BiocParallel) library(ggplot2) library(patchwork) library(BioHelper)
tro <- readGAlignments("/mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/RTA-03.minimap2genome.bam") sch <- readGAlignments("/mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/RTA-17.minimap2genome.bam")
ref <- readDNAStringSet("/mnt/raid61/Personal_data/songjunwei/reference/plasmoDB/fa/PlasmoDB-53_PbergheiANKA_Genome.fasta") names(ref) <- mapply(function(x) x[1], strsplit(names(ref), " \\| "))
txdb <- makeTxDbFromGFF("/mnt/raid61/Personal_data/songjunwei/reference/plasmoDB/gff/PlasmoDB-53_PbergheiANKA.gff") introns0 <- unlist(intronsByTranscript(txdb))
bamFiles <- list.files("/mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/", "bam$", full.names = TRUE)
params <- Rsamtools::ScanBamParam(flag = Rsamtools::scanBamFlag(isNotPassingQualityControls = FALSE), what = c("mapq", "flag"), mapqFilter = 0) mclapply(bamFiles, function(bam) { # if(file.exists(gsub(".bam$", ".SJ.tab", bam))) return(NULL) map0 <- GenomicAlignments::readGAlignments(file = bam, param = params, use.names = FALSE) # map0 <- map0[with(map0, !flag %in% c(2048, 2064))] junc <- junctions(map0) junc <- unlist(junc) junc <- table(junc) junc <- data.table(SJ = names(junc), N = as.integer(junc)) junc <- GRanges(as(junc[, SJ], "GRanges"), N = junc[, N]) junc$annotation <- as.integer(as.character(junc) %in% as.character(introns0)) junc <- DonorSiteSeq(junc, ref, exon = 0, intron = 2) junc <- AcceptorSiteSeq(junc, ref, exon = 0, intron = 2) intron_motif <- with(junc, paste0(DonorMotif, AcceptorMotif)) intron_motif[intron_motif == "GTAG"] <- 1 intron_motif[intron_motif == "CTAC"] <- 2 intron_motif[intron_motif == "GCAG"] <- 3 intron_motif[intron_motif == "CTGC"] <- 4 intron_motif[intron_motif == "ATAC"] <- 5 intron_motif[intron_motif == "GTAT"] <- 6 intron_motif[!intron_motif %in% 1:6] <- 0 junc$IntronMotif <- as.integer(intron_motif) junc$DonorMotif <- NULL junc$AcceptorMotif <- NULL c1 <- with(junc, annotation == 1) c2 <- with(junc, annotation == 0 & IntronMotif %in% c(1, 3, 5) & N > 1) c3 <- with(junc, annotation == 0 & !IntronMotif %in% c(1, 3, 5) & N > 10) junc$Filter <- as.numeric(c1 | c2 | c3) fwrite(as.data.table(junc), gsub(".bam$", ".SJ.tab", bam), row.names = F, quote = F, sep = "\t") }, mc.cores = 8)
SJFiles <- list.files("/mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/", ".SJ.tab$", full.names = TRUE) SJList <- lapply(SJFiles[c(1, 4)], fread) SJList2 <- lapply(SJList, function(x) x[Filter == 1]) SJTab <- data.table(Stage = rep(c("Trophozoite", "Schizont"), mapply(nrow, SJList2)), do.call(rbind, SJList2))
Mat1 <- SJTab[, .N, by = c("Stage", "annotation")] Mat1$P <- round(Mat1[, prop.table(N), by = Stage]$V1 * 100, 2) Mat1$L <- paste0(Mat1$N, "\n(", Mat1$P, "%)") Mat1[annotation == 0, L := N] ggplot(Mat1, mapping = aes(x = Stage, fill = factor(annotation), y = N)) + geom_col() + geom_text(aes(label = L), position = position_stack(vjust = 0.5)) + theme_bw(base_size = 15) + guides(fill = guide_legend(title = "annotation")) + theme(legend.position = "top") + labs(y = "Number of SJ") -> p1 p1 ggsave("/mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/m6A_AS/No.SJ.pdf", width = 3, height = 4)
SJTab[, SJ := paste0(seqnames, ":", start, "-", end, ":", strand)] NovelSJ <- with(SJTab[annotation == 0, ], split(SJ, Stage)) VennCustom2(input = NovelSJ) + labs(title = "Novel SJ") + theme(title = element_text(size = 16)) ggsave("/mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/m6A_AS/Novel.SJ.pdf", width = 4, height = 4)
newsj <- SJTab[annotation == 0 & N > 5, .SD[.N == 1, ], SJ] newsj2 <- lapply(seq_len(nrow(newsj)), function(i) { r1 <- SJTab[seqnames == newsj[i, seqnames] & (start == newsj[i, start] | end == newsj[i, end]) & Stage != newsj[i, Stage] & annotation == 1] r2 <- SJTab[Stage != newsj[i, Stage] & SJ == newsj[i, SJ]] if(nrow(r1) > 0) { rbind(r1, r2) } else { NULL } })
case1 <- rbind(SJTab[SJ == newsj2[[43]]$SJ], newsj[43, ]) case2 <- rbind(SJTab[SJ == newsj2[[80]]$SJ], newsj[80, ])
met <- fread("/mnt/raid61/Personal_data/songjunwei/DRS_RTA/batch4_f5_fq/barcodediff_res.csv")[, -1] met <- met[barcode %in% c("RTA03", "RTA17")]
metsites <- with(met, split(name, sample)) names(metsites) <- c("Schizont", "Trophozoite") VennCustom2(input = metsites) + labs(title = "m6A") + theme(title = element_text(size = 16)) ggsave("/mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/m6A_AS/m6A.pdf", width = 5, height = 4)
Meth <- fread("/mnt/raid61/Personal_data/songjunwei/DRS_RTA/batch4_f5_fq/barcodediff_res.csv")[, -1] Meth[, ratio := as.numeric(ratio)] # hv <- Meth[, sd(ratio, na.rm = T), name][!is.na(V1)][order(V1, decreasing = T)][1:50, name] MethTab <- dcast(data = Meth, formula = name + gene ~ barcode, value.var = "ratio") # MethTab <- MethTab[name %in% hv] MethTab <- data.frame(MethTab[, -c(1:2)], row.names = paste0(MethTab[[1]], "_", MethTab[[2]])) hv <- data.table(ID = rownames(MethTab), sd = apply(MethTab, 1, sd, na.rm = T)) hv <- hv[!is.na(sd)][order(sd, decreasing = T)][, ID] length(hv) MethTab[is.na(MethTab)] <- 0 library(FactoMineR) pca_res <- PCA(t(MethTab[hv, ]), ncp = 3, graph = F) library(FactoMineR) pca_result <- data.frame(pca_res$svd$U, Name = colnames(MethTab)) pca_result$name <- gsub("RTA", "RTA-", pca_result$Name) pca_result$Stage <- plyr::mapvalues(pca_result$Name, c("RTA03", "RTA10", "RTA16", "RTA17", "RTA24", "RTA32"), c("Trophozoite", "Trophozoite", "Trophozoite", "Schizont", "Schizont", "Schizont")) library(ggrepel) ggplot(pca_result, aes(x = X1, y = X2, color = Stage))+ geom_jitter(size = 2) + #Size and alpha just for fun geom_text_repel(aes(label = name)) + theme_bw(base_size = 15) + xlab(paste("PC1(", round(pca_res$eig[,2][1]), "%)", sep = "")) + ylab(paste("PC2(", round(pca_res$eig[,2][2]), "%)", sep = "")) + scale_color_manual(values = RColorBrewer::brewer.pal(n = 3, name = "Dark2")[1:2]) + theme(legend.position = "top") ggsave("/mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/m6A_AS/m6A_PCA.pdf", width = 4.2, height = 4.2)
Meth <- fread("/mnt/raid61/Personal_data/songjunwei/DRS_RTA/batch4_f5_fq/barcodediff_res.csv")[, -1] Meth[, ratio := as.numeric(ratio)] # hv <- Meth[, sd(ratio, na.rm = T), name][!is.na(V1)][order(V1, decreasing = T)][1:50, name] MethTab <- dcast(data = Meth, formula = name + gene ~ barcode, value.var = "ratio") # MethTab <- MethTab[name %in% hv] MethTab <- data.frame(MethTab[, -c(1:2)], row.names = paste0(MethTab[[1]], "_", MethTab[[2]])) hv <- data.table(ID = rownames(MethTab), sd = apply(MethTab, 1, sd, na.rm = T)) hv <- hv[!is.na(sd)][order(sd, decreasing = T)][1:500, ID] length(hv) MethTab[is.na(MethTab)] <- 0 library(FactoMineR) pca_res <- PCA(t(MethTab[hv, ]), ncp = 3, graph = F) library(FactoMineR) pca_result <- data.frame(pca_res$svd$U, Name = colnames(MethTab)) pca_result$name <- gsub("RTA", "RTA-", pca_result$Name) pca_result$Stage <- plyr::mapvalues(pca_result$Name, c("RTA03", "RTA10", "RTA16", "RTA17", "RTA24", "RTA32"), c("Trophozoite", "Trophozoite", "Trophozoite", "Schizont", "Schizont", "Schizont")) ggplot(pca_result, aes(x = X1, y = X2, color = Stage))+ geom_jitter(size = 2) + #Size and alpha just for fun geom_text_repel(aes(label = name)) + theme_bw(base_size = 15) + xlab(paste("PC1(", round(pca_res$eig[,2][1]), "%)", sep = "")) + ylab(paste("PC2(", round(pca_res$eig[,2][2]), "%)", sep = "")) + scale_color_manual(values = RColorBrewer::brewer.pal(n = 3, name = "Dark2")[1:2]) + theme(legend.position = "top") ggsave("/mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/m6A_AS/m6A_PCA_top500.pdf", width = 4.2, height = 4.2)
files <- list.files("/mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/suppa2", "ioe", full.names = T) files_03 <- grep("RTA-03", files, value = T) files_17 <- grep("RTA-17", files, value = T)
event_03 <- lapply(files_03, fread) names(event_03) <- mapply(function(x) x[2], strsplit(basename(files_03), "_")) event_03 <- event_03[mapply(nrow, event_03) > 0] event_17 <- lapply(files_17, fread) names(event_17) <- mapply(function(x) x[2], strsplit(basename(files_17), "_")) event_17 <- event_17[mapply(nrow, event_17) > 0]
AS_N <- rbind(data.table(Stage = "Trophozoite", AS = names(event_03), N = mapply(nrow, event_03)), data.table(Stage = "Schizont", AS = names(event_17), N = mapply(nrow, event_17))) AS_N <- AS_N[, .SD[, .(AS, N, P = prop.table(N) * 100)], by = "Stage"] AS_N[, L := paste0(N, "\n(", round(P, 1), "%)")]
ggplot(AS_N, aes(x = AS, y = N, fill = Stage)) + geom_col(position = "dodge") + geom_text(aes(label = L, y = N + 30), position = position_dodge2(width = 0.9), size = 3) + # scale_y_log10() + scale_y_continuous(limits = c(0, 320)) + theme_bw(base_size = 15) + labs(y = "No. events") + theme(legend.position = "top") + scale_fill_manual(values = RColorBrewer::brewer.pal(n = 3, name = "Dark2")[1:2]) ggsave("/mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/m6A_AS/AS_Event.pdf", width = 6, height = 4)
AS_N <- rbind(data.table(Stage = "Schizont", AS = c("A3", "A5", "AF", "RI", "SE"), N = c(15, 28, 7, 276, 10), P = c(4.5, 8.3, 2.1, 82.1, 3)), data.table(Stage = "Trophozoite", AS = c("A3", "A5", "AF", "RI", "SE"), N = c(10, 11, 3, 201, 9), P = c(4.3, 4.7, 1.3, 85.9, 3.8))) AS_N[, Stage := factor(Stage, levels = c("Trophozoite", "Schizont"))] AS_N[, AS := factor(AS, levels = AS_N[, mean(P), AS][order(V1, decreasing = T), AS])]
ggplot(AS_N, aes(x = AS, y = P, fill = Stage)) + geom_col(position = "dodge") + geom_text(aes(label = N, y = P + 2), position = position_dodge2(width = 0.9), size = 3) + theme_bw(base_size = 15) + labs(y = "Percentage (%)") + theme(legend.position = "top") + scale_fill_manual(values = RColorBrewer::brewer.pal(n = 3, name = "Dark2")[2:1]) ggsave("/mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex/analysis/09.AlternativeSplicing/01.Plasmodium/20211025/AS_Event_V2.pdf", width = 6, height = 4)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.